clear all

/// This file merges population and election data at the county-sldu level 

cd "$OutputPath/"
use county_sldu_pop, clear // a county-sldl dataset

statastates, f(fstate)
keep if _m==3
drop _m

rename (state_name) (state)
replace state=strproper(state)


tempfile base
save `base', replace

/// use 2010 maps for states that did not participate in 2000, but submitted useable pre-2010 plans (and match with turnout data)
// replace Iowa and Missouri post-2000 map with pre- 2010 map; matches 2004 elections
// replace Alaska 2007 map with 2012 map; supreme court intervened

keep if (state=="Alaska" | state=="Arkansas" | state=="California" | state=="Florida" | state=="Hawaii" | state=="Kentucky"  | state=="Maine" | state=="Pennsylvania" | state=="Tennessee" | state=="Wisconsin" ///
 | state=="Maryland" | state=="Minnesota" | state=="Montana" | state=="Nebraska" | state=="New Jersey"  ///
 | state=="New Hampshire" | state=="Oregon" | state=="Vermont" | state=="Iowa"  | state=="Missouri"  ) & year==2012

 
replace censusyear=2000
replace year=2007

tempfile replace2000
save `replace2000', replace

use `base', clear

drop if (state=="Alaska" | state=="Arkansas" | state=="California" | state=="Florida" | state=="Hawaii" | state=="Kentucky"  | state=="Maine" | state=="Pennsylvania" | state=="Tennessee" | state=="Wisconsin" ///
 | state=="Maryland" | state=="Minnesota" | state=="Montana" | state=="Nebraska" | state=="New Jersey" ///
 | state=="New Hampshire" | state=="Oregon" | state=="Vermont" | state=="Iowa"  | state=="Missouri" ) & year==2007
 
append using `replace2000'


/// Merge with sldl codes that correspond to elections_vtd.do data
gen slduname=sldu
gen sab=state_ab
do "$CodePath/elections_csld_edits/slducodes.do" //replace string districts with code

destring sldu, replace

drop if sldu==. //DC
drop if state_a=="NE"

tempfile base
save `base', replace

cd "$OutputPath/"
use elections_csldu.dta, clear 
append using elections_csldu_2014.dta 

drop if state_a=="NE"

merge 1:1 year fstate fcounty sldu using `base', gen(m_elec) 

	/* Evaluate merge
	* m_elec=2 because of availability of turnout data (improves over time) or because of uncontested election (treated as missing if no candidates on ballot)
	* m_elec=1 if there is turnout data and no corresponding area in the Census. 1.3% of observations. Some errors (sldl==0), some absentee votes, which are not assigned to a county (fcounty==.)
	
	gen true_error=m_elec==1 & fcounty!=. & sldu!=. & sldu!=0  // 3.27%  of observations -- only 1 or 2 per state, except MD and SD, 2017
	
	egen error_rate=mean(true_error), by(fstate year)  //90th percentile is 0.7%, 95th percentile is 15% errors
	
	
	// the above validation disqualifies several maps for the upper house, for which no alternative block-assignment files were available, to my knowledge
	drop if m_elec==1
	tab year state_a if error_rate>.15
	*/ 
	drop if (state_a=="MO" & year==2007 ) | (state_a=="SD" & year==2017) | (state_a=="WI" & year==2007) | (state_a=="WI" & year==2012) | (state_a=="NC" & year==2007) | (state_a=="NC" & year==2012)

	
save `base', replace


use "$OutputPath/statelegislators_upper.dta", clear

//append using statelegislators2014
merge 1:m fstate sldu year using `base', gen(m_leg)

// m_elec=2 because of uncontested elections
 drop if m_leg==1 

// harmonize state-level vars
foreach v of varlist totaldistricts totaldistricts_senate termlength {

	egen s`v'=mean(`v'), by(state) 
	replace `v'=s`v'
	drop s`v'
}

// correct nmember
replace nmember=2 if state=="West Virginia" 

drop if fcounty==.


cd "$OutputPath/"
save county_sldu_pop_elec, replace
